Code
library (here) # path management
# Data wrangling
library (dplyr)
library (tidyr)
library (tibble)
library (stringr)
library (tidytext) # reorder_within
library (effectsize) # Cramer's V
# Ade4
library (ade4)
library (adegraphics)
# Plot
library (ggplot2)
library (patchwork)
library (viridisLite) # Color palette
library (DT) # Display table
# Graph stuff
library (igraph)
library (tidygraph)
library (ggraph)
# Custom package
library (RSnetwork)
set.seed (42 )
# Paths ---
figures_path <- here ("figures/04_data_analysis" )
output_path <- here ("outputs/04_data_analysis" )
Parameters
Code
paste (names (params), params, sep = ": " )
[1] "in_folder: ANDEAN_Peru1" "min_freq: 2"
[3] "presabs: FALSE" "log: FALSE"
[5] "colco: darkred" "colli: cornflowerblue"
[7] "nf_ca: 3" "alpha: 0.05"
[9] "selection: LRT"
Prepare data
Read data
We first read data that was cleaned before (from Dehling et al. (2021 ) ).
Code
#|code-fold: show
in_folder_type <- file.path (here ("outputs/01_clean_data" ))
in_folder_full <- file.path (in_folder_type,
params$ in_folder)
interactions_df <- read.csv (file.path (in_folder_full,
"interactions.csv" ))
plant_traits <- read.csv (file.path (in_folder_full,
"plant_traits.csv" ))
animal_traits <- read.csv (file.path (in_folder_full,
"animal_traits.csv" ))
Filter data
Here we will filter the matrix so that each species interacts with 2 or more other species.
Code
interactions <- filter_matrix (mat = interactions,
thr = params$ min_freq)
Code
animal_traits <- animal_traits[colnames (interactions),]
plant_traits <- plant_traits[rownames (interactions),]
params$presabs is FALSE so we don’t transform the data to presence-absence.
Code
interactions_orig <- interactions
Code
if (params$ presabs) {
interactions <- (interactions != 0 )
interactions <- ifelse (interactions, 1 , 0 )
interactions <- as.data.frame (interactions)
}
params$log is FALSE so we don’t scale the data to log scale.
Code
if (params$ log) {
interactions <- log (interactions + 1 )
}
Save data in a file:
Code
write.csv (interactions, file = file.path (output_path, "interactions.csv" ))
write.csv (animal_traits, file = file.path (output_path, "animal_traits.csv" ))
write.csv (plant_traits, file = file.path (output_path, "plant_traits.csv" ))
Characterize filtered out species
Let’s explore the profile of the birds/plants that were filtered out compared to the profiles of all birds/plants. Here, we look at the column sums (i.e. the marginal abundance). The aim is to see if the removed species were not only interacting with few species, but also rare.
Code
bird_names_out <- colnames (interactions_full)[! (colnames (interactions_full) %in% colnames (interactions))]
bird_deg_out <- colSums (interactions_full[, bird_names_out])
bird_deg_out <- data.frame (degree = bird_deg_out) |>
rownames_to_column ("bird" )
gout <- ggplot (bird_deg_out) +
geom_col (aes (x = reorder (bird, - degree), y = degree),
fill = params$ colco) +
theme_linedraw () +
theme (axis.text.x = element_text (angle = 45 , hjust = 1 )) +
ggtitle ("Filtered out birds abundance" )
bird_deg_full <- colSums (interactions_full)
bird_deg_full <- data.frame (degree = bird_deg_full) |>
rownames_to_column ("bird" )
gall <- ggplot (bird_deg_full) +
geom_col (aes (x = reorder (bird, - degree), y = degree),
fill = params$ colco) +
theme_linedraw () +
theme (axis.text.x = element_text (angle = 45 , hjust = 1 )) +
ggtitle ("Birds abundance" )
gall + gout &
scale_y_log10 (limits = c (1 , max (bird_deg_full$ degree))) &
ylab ("Weighted degree" ) &
theme (axis.title.x = element_blank ())
Code
plant_names_out <- rownames (interactions_full)[! (rownames (interactions_full) %in% rownames (interactions))]
plant_deg_out <- rowSums (interactions_full[plant_names_out, ])
plant_deg_out <- data.frame (degree = plant_deg_out) |>
rownames_to_column ("plant" )
gout <- ggplot (plant_deg_out) +
geom_col (aes (x = reorder (plant, - degree), y = degree),
fill = params$ colli) +
theme_linedraw () +
theme (axis.text.x = element_text (angle = 45 , hjust = 1 )) +
ggtitle ("Filtered out plants abundance" )
plant_deg_full <- rowSums (interactions_full)
plant_deg_full <- data.frame (degree = plant_deg_full) |>
rownames_to_column ("plant" )
gall <- ggplot (plant_deg_full) +
geom_col (aes (x = reorder (plant, - degree), y = degree),
fill = params$ colli) +
theme_linedraw () +
theme (axis.text.x = element_text (angle = 45 , hjust = 1 )) +
ggtitle ("Plant abundance" )
gall + gout & scale_y_log10 (limits = c (1 , max (plant_deg_full$ degree))) &
ylab ("Weighted degree" ) &
theme (axis.title.x = element_blank ())
Display data
The matrix we will analyze has 40 rows (plants) and 53 columns (birds).
Here is the original data matrix:
Code
interactions_df <- matrix_to_df (interactions, colnames = "birds" )
interactions_df_points <- interactions_df |>
filter (value != 0 )
Code
if (params$ log) {
msize <- max (interactions)* 0.7
} else {
msize <- max (interactions)* 0.01
}
breaks <- c (1 , 10 , 100 , 500 )
# Visualize filtered matrix
plot_matrix (interactions,
legend_title = ifelse (params$ log,
"ln(frequency)" , "Frequency" ),
max_size = msize,
base_size = 9 ,
breaks = breaks,
trans = "log1p" ,
xlab = "Birds" ) +
theme (legend.key.width = unit (0.1 , 'cm' ),
axis.line.x.top = element_line (colour = params$ colco,
linewidth = 1 ),
axis.ticks.x.top = element_line (colour = params$ colco),
axis.line.y = element_line (colour = params$ colli,
linewidth = 1 ),
axis.ticks.y = element_line (colour = params$ colli))
Code
# Export plot
ggsave (file.path (figures_path, "matrix.jpeg" ),
width = 14 , height = 12 ,
dpi = 600 , units = "cm" , bg = "white" )
Code
# Create graph object
g <- graph_from_biadjacency_matrix (interactions, weighted = TRUE )
gtbl <- as_tbl_graph (g) |>
activate (nodes) |>
mutate (type = ifelse (type, "bird" , "plant" ))
nodes <- V (g)
# Reorder nodes as the original table
co_reordered <- unname (rank (nodes[colnames (interactions)]))
li_reordered <- unname (rank (nodes[rownames (interactions)]))
# Create centered coordinates
co_y <- scale (co_reordered, center = TRUE ,
scale = FALSE )
li_y <- scale (li_reordered, center = TRUE ,
scale = FALSE )
y <- cbind (c (li_y, co_y))
x <- rep (c (0 , 1 ), c (length (li_y), length (co_y)))
lay <- data.frame (x, y)
Code
gtbl <- gtbl |>
mutate (degree = centrality_degree (weights = weight))
ggraph (gtbl, layout = lay) +
geom_edge_fan (aes (width = weight),
alpha = 0.6 ,
show.legend = FALSE ) +
scale_edge_width (range = c (.2 , 2.5 )) +
scale_color_manual (values = c (params$ colco, params$ colli)) +
geom_node_point (aes (col = type, size = degree),
show.legend = FALSE ) +
scale_size_area (max_size = 3 , trans = "log" ) +
geom_node_text (aes (label = name),
show.legend = FALSE ,
size = 2.5 ,
nudge_x = c (rep (- 0.03 , nrow (interactions)),
rep (0.03 , ncol (interactions)))) +
theme_void ()
Names and codes
Below are the correspondences between species codes and their names:
Code
# Transform for LateX
# Italicize
codes_plants$ ` Species name ` <- paste0 (" \\ textit{" , codes_plants$ ` Species name ` , "}" )
codes_animals$ ` Species name ` <- paste0 (" \\ textit{" , codes_animals$ ` Species name ` , "}" )
# Add newline character
codes_plants[, ncol (codes_plants)] <- paste0 (codes_plants[, ncol (codes_plants)], " \\\\ " )
colnames (codes_plants)[ncol (codes_plants)] <- paste0 (colnames (codes_plants)[ncol (codes_plants)], " \\\\ " )
codes_animals[, ncol (codes_animals)] <- paste0 (codes_animals[, ncol (codes_animals)], " \\\\ " )
colnames (codes_animals)[ncol (codes_animals)] <- paste0 (colnames (codes_animals)[ncol (codes_animals)], " \\\\ " )
Code
write.table (codes_plants,
file = file.path (output_path, "codes_plants.csv" ),
quote = FALSE ,
sep = " & " ,
row.names = FALSE )
write.table (codes_animals,
file = file.path (output_path, "codes_animals.csv" ),
quote = FALSE ,
sep = " & " ,
row.names = FALSE )
Correspondence analysis
Let’s first perform a chi-squared test to check if there is structure in the data.
Code
(chsq <- chisq.test (interactions, simulate.p.value = TRUE ))
Pearson's Chi-squared test with simulated p-value (based on 2000
replicates)
data: interactions
X-squared = 17784, df = NA, p-value = 0.0004998
Code
(V <- cramers_v (interactions))
Cramer's V (adj.) | 95% CI
--------------------------------
0.29 | [0.26, 1.00]
- One-sided CIs: upper bound fixed at [1.00].
The p-value is 5^{-4}.
Now let’s perform CA. For this analysis, we will keep 3 axes.
Code
ca <- dudi.coa (interactions,
scannf = FALSE ,
nf = params$ nf_ca)
CA eigenvalues and square root of eigenvalues:
Code
[1] 6.191934e-01 4.167874e-01 3.928879e-01 3.039191e-01 2.773904e-01
[6] 2.581142e-01 1.784613e-01 1.448607e-01 1.294030e-01 1.157875e-01
[11] 1.027157e-01 8.850992e-02 8.342531e-02 6.946602e-02 5.637214e-02
[16] 4.874483e-02 4.126778e-02 3.824801e-02 3.333744e-02 3.185438e-02
[21] 2.555155e-02 2.300389e-02 2.218657e-02 2.084826e-02 1.716005e-02
[26] 1.435705e-02 1.262480e-02 1.193993e-02 7.404297e-03 5.325839e-03
[31] 4.704812e-03 3.494085e-03 2.855760e-03 2.200572e-03 1.676954e-03
[36] 9.899936e-04 6.284900e-04 3.436072e-04 1.314759e-07
Code
# Square roots of eigenvalues
sqrt (ca$ eig)
[1] 0.7868884498 0.6455907415 0.6268077170 0.5512885639 0.5266786274
[6] 0.5080493838 0.4224467983 0.3806057051 0.3597263539 0.3402755650
[11] 0.3204929191 0.2975061708 0.2888343968 0.2635640656 0.2374281810
[16] 0.2207823099 0.2031447240 0.1955709726 0.1825854443 0.1784779585
[21] 0.1598485306 0.1516703401 0.1489515770 0.1443892510 0.1309963771
[26] 0.1198209041 0.1123601228 0.1092700031 0.0860482228 0.0729783458
[31] 0.0685916339 0.0591107831 0.0534393144 0.0469102545 0.0409506254
[36] 0.0314641633 0.0250697037 0.0185366457 0.0003625961
Below are the corresponding eigenvalues graphs:
Code
plot_eig (ca$ eig) +
ggtitle ("Eigenvalues" ) +
theme (axis.title.y = element_blank ())
Code
plot_eig (sqrt (ca$ eig)) +
ggtitle ("Square roots of eigenvalues" ) +
theme (axis.title.y = element_blank ())
Code
plot_eig (cumsum (ca$ eig)/ sum (ca$ eig)) +
ggtitle ("Cumulative sum of relative eigenvalues" ) +
theme (axis.title.y = element_blank ())
Below is the interaction matrix reordered with the first axis of the CA:
Code
interactions_reordered <- interactions[order (ca$ li[, 1 ]),
order (ca$ co[, 1 ])]
plot_matrix (interactions_reordered,
legend_title = ifelse (params$ log,
"ln(frequency)" , "Frequency" ),
max_size = msize,
base_size = 9 ,
breaks = breaks,
trans = "log1p" ,
xlab = "Birds" ) +
theme (legend.key.width = unit (0.1 , 'cm' ),
axis.line.x.top = element_line (colour = params$ colco,
# lineend = "butt",
linewidth = 1 ),
axis.ticks.x.top = element_line (colour = params$ colco),
axis.line.y = element_line (colour = params$ colli,
# lineend = "square",
linewidth = 1 ),
axis.ticks.y = element_line (colour = params$ colli))
Code
# Export plot
ggsave (file.path (figures_path, "matrix_reordered.jpeg" ),
width = 14 , height = 12 ,
dpi = 600 , units = "cm" , bg = "white" )
Code
# Reorder CA coordinates as the graph nodes
co_reordered <- ca$ co[nodes, 1 ]
co_reordered <- co_reordered[! is.na (co_reordered)]
li_reordered <- ca$ li[nodes, 1 ]
li_reordered <- li_reordered[! is.na (li_reordered)]
# Get the ranks
co_rank <- rank (ca$ co[,1 ])
li_rank <- rank (li_reordered)
# Transformto centered coordinates
co_y <- scale (co_rank, center = TRUE ,
scale = FALSE )
li_y <- scale (li_rank, center = TRUE ,
scale = FALSE )
y2 <- cbind (c (li_y, co_y))
lay2 <- lay |>
mutate (y = y2)
Code
ggraph (gtbl, layout = lay2) +
geom_edge_fan (aes (width = weight),
alpha = 0.6 ,
show.legend = FALSE ) +
scale_edge_width (range = c (.2 , 2.5 )) +
scale_color_manual (values = c (params$ colco, params$ colli)) +
geom_node_point (aes (col = type, size = degree),
show.legend = FALSE ) +
scale_size_area (max_size = 3 , trans = "log" ) +
geom_node_text (aes (label = name),
show.legend = FALSE ,
size = 2.5 ,
nudge_x = c (rep (- 0.03 , nrow (interactions)),
rep (0.03 , ncol (interactions)))) +
theme_void ()
We can also rearrange the interaction matrix to highlight nestedless. For that, we arrange by decreasing weighted degree:
Code
interactions_nested <- interactions[order (rowSums (interactions)),
order (colSums (interactions))]
plot_matrix (interactions_nested,
legend_title = ifelse (params$ log,
"ln(frequency)" , "Frequency" ),
max_size = msize,
base_size = 9 ,
breaks = breaks,
trans = "log1p" ,
xlab = "Birds" ) +
theme (legend.key.width = unit (0.1 , 'cm' ),
axis.line.x.top = element_line (colour = params$ colco,
linewidth = 1 ),
axis.ticks.x.top = element_line (colour = params$ colco),
axis.line.y = element_line (colour = params$ colli,
linewidth = 1 ),
axis.ticks.y = element_line (colour = params$ colli))
We can visualize the coordinates ordered along the axes and scan for gaps (as advised by Dam et al. (2021 ) ):
Code
li_coord <- ca$ li |>
rownames_to_column ("plant" )
li_coord <- li_coord |>
pivot_longer (cols = 2 : ncol (li_coord),
names_to = "axis" )
gli <- ggplot (li_coord) +
geom_point (aes (x = reorder_within (plant, by = value, within = axis,
decreasing = TRUE ),
y = value, group = axis),
col = params$ colli) +
xlab ("Plants" )
Code
co_coord <- ca$ co |>
rownames_to_column ("bird" )
co_coord <- co_coord |>
pivot_longer (cols = 2 : ncol (co_coord),
names_to = "axis" ) |>
mutate (axis = str_replace (axis, "Comp" , "Axis" ))
gco <- ggplot (co_coord) +
geom_point (aes (x = reorder_within (bird, by = value, within = axis,
decreasing = TRUE ),
y = value, group = axis),
col = params$ colco) +
xlab ("Birds" )
Code
gli / gco &
facet_grid (cols = vars (axis), scales = "free" ) &
ylab ("Coordinate" ) &
theme_linedraw () &
theme (axis.text.x = element_blank (),
axis.ticks.x = element_blank ())
To plot the results in a biplot, there are two scalings available. They are depicted below.
Code
xlim <- c (- 6 , 10 )
ylim <- c (- 6 , 10 )
multiplot (indiv_row = ca$ li, indiv_col = ca$ c1,
row_color = params$ colli, col_color = params$ colco, eig = ca$ eig,
max.overlaps = 20 ,
xlim = xlim, ylim = ylim,
title = "Plants at the mean of birds (plants distances conserved)" )
Warning: ggrepel: 31 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Code
multiplot (indiv_row = ca$ li, indiv_col = ca$ c1,
x = 2 , y = 3 ,
row_color = params$ colli, col_color = params$ colco, eig = ca$ eig,
max.overlaps = 20 ,
xlim = xlim, ylim = ylim,
title = "Plants at the mean of birds (plants distances conserved)" )
Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Code
xlim <- c (- 6 , 10 )
ylim <- c (- 6 , 10 )
multiplot (indiv_row = ca$ l1, indiv_col = ca$ co,
row_color = params$ colli, col_color = params$ colco, eig = ca$ eig,
xlim = xlim, ylim = ylim,
title = "Birds at the mean of plants (birds distances conserved)" )
Warning: ggrepel: 38 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Code
multiplot (indiv_row = ca$ l1, indiv_col = ca$ co,
x = 2 , y = 3 ,
xlim = xlim, ylim = ylim,
row_color = params$ colli, col_color = params$ colco, eig = ca$ eig,
title = "Birds at the mean of plants (birds distances conserved)" )
Warning: ggrepel: 43 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 21 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Reciprocal scaling
We will now perform reciprocal scaling (Thioulouse and Chessel 1992 ) .
Code
recscal <- reciprocal.coa (ca)
Code
multiplot (indiv_row = recscal,
indiv_row_lab = paste (recscal$ Row, recscal$ Col, sep = "-" ),
eig = ca$ eig,
alphapoints = 0.5 )
Warning: ggrepel: 330 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
This graph shows the correspondences of species interactions in the multivariate space. The correspondences are related to the CA coordinates as follows (Thioulouse and Chessel 1992 ) :
\[h_k(i, j) = \frac{u^\star_{ik} + v^\star_{jk}}{\sqrt{2 \lambda_k \mu_k}}\]
To put it differently, each correspondence is at the “mean” position between the li(\(L_k(i)\) ) and the co (scaled with a scaling factor).
The standard deviations for plant species \(i\) is (Thioulouse and Chessel 1992 ) :
\[s_{ik} = \sqrt{\frac{1}{2\lambda_k\mu_k} \left( \frac{1}{y_{i \cdot}} \sum_{j = 1}^c \left(y_{ij} {v^\star_{jk}}^2 \right) - \lambda_k {u^\star_{ik}}^2 \right)}\]
And the standard deviations for bird species \(j\) is (Thioulouse and Chessel 1992 ) :
\[s_{jk} = \sqrt{\frac{1}{2\lambda_k\mu_k} \left( \frac{1}{y_{\cdot j}} \sum_{i = 1}^r \left(y_{ij} {u^\star_{ik}}^2 \right) - \lambda_k {v^\star_{jk}}^2 \right)}\] NB: these standard deviations can also be computed directly as the standard deviations of the \(h_k(i, j)\) .
The graphs below show plants and birds interaction niches in the multivariate space.
Code
# Create viridis palette for rows
colrow <- viridis (nrow (interactions), option = "G" , end = 0.9 )
# Order palette so that colors match position on axis 1
colrow <- colrow[rank (ca$ li[[1 ]])]
# png(file.path(figures_path, "plant_ell.png"),
# bg = "white",
# res = 600,
# width = 15, height = 15, units = "cm")
plot_reciprocal (recscal = recscal,
dudi = ca,
col = colrow,
group = "li" ,
xax = 1 , yax = 2 )
Code
# dev.off()
plot_reciprocal (recscal = recscal,
dudi = ca,
col = colrow,
group = "li" ,
xax = 2 , yax = 3 )
Code
# Create viridis palette for columns
colco <- viridis (ncol (interactions), option = "F" , end = 0.9 )
# Order palette so that colors match position on axis 1
colco <- colco[rank (ca$ co[[1 ]])]
# png(file.path(figures_path, "birds_ell.png"),
# bg = "white",
# res = 600,
# width = 15, height = 15, units = "cm")
plot_reciprocal (recscal = recscal,
dudi = ca,
group = "co" ,
xax = 1 , yax = 2 ,
col = colco)
Code
# dev.off()
plot_reciprocal (recscal = recscal,
dudi = ca,
group = "co" ,
xax = 2 , yax = 3 ,
col = colco)
Linear model
Here we try to model niche width as a function of the position in the axes of the multivariate analysis.
A linear model is also constructed and chosen as the best model between \[s_k = a~m_k + c \] and \[s_k = a~m_k + b~m_k^2 + c\]
Code
ts <- 2.5 # labels size
ps <- 1 # points size
ax <- 1 : 3
lmplots <- list ()
gbirds2 <- list ()
gplants2 <- list ()
lmbirds <- vector (mode = "list" , length = length (ax))
names (lmbirds) <- paste0 ("ax" , ax, "_birds" )
lmplants <- vector (mode = "list" , length = length (ax))
names (lmplants) <- paste0 ("ax" , ax, "_plants" )
msd <- get_mean_sd (recscal, ax = 1 : params$ nf_ca)
Code
for (a in ax){
# Get the dataframes with mean/standard deviation for a given axis
msd_birds <- data.frame (birds = rownames (msd$ colsd),
mean = msd$ colmean[, a],
sd = msd$ colsd[, a])
msd_plants <- data.frame (plants = rownames (msd$ rowsd),
mean = msd$ rowmean[, a],
sd = msd$ rowsd[, a])
# Linear models for birds ---
# Construct models
lmsimple <- lm (sd ~ mean, msd_birds)
lm2 <- lm (sd ~ mean + I (mean^ 2 ), msd_birds)
# Get best model for birds
lmb <- get_best_model (lmsimple, lm2, alpha = params$ alpha)
lab_birds <- lm_labels (lmb, a)
# Get predictions
pred <- "confidence"
birds_pred <- get_pred (dat_predict = msd_birds$ mean, lmb,
interval = pred, level = 1 - params$ alpha)
# Linear models for plants ---
# Construct models
lmsimple <- lm (sd ~ mean, msd_plants)
lm2 <- lm (sd ~ mean + I (mean^ 2 ), msd_plants)
# Get best model for plants
lmp <- get_best_model (lmsimple, lm2, alpha = params$ alpha)
lab_plants <- lm_labels (lmp, a)
# Get predictions
plants_pred <- get_pred (dat_predict = msd_plants$ mean, lmp,
interval = pred, level = 1 - params$ alpha)
# Merge datasets ---
msd_plants <- msd_plants |>
rename ("species" = "plants" ) |>
mutate (type = "Plants" )
msd_birds <- msd_birds |>
rename ("species" = "birds" ) |>
mutate (type = "Birds" )
msd_merge <- rbind (msd_plants, msd_birds)
plants_pred <- plants_pred |>
mutate (type = "Plants" )
birds_pred <- birds_pred |>
mutate (type = "Birds" )
pred <- rbind (plants_pred, birds_pred)
lab_df <- rbind (lab_birds, lab_plants)
lab_df$ type <- c ("Birds" , "Plants" )
# Plots sd versus mean ---
lmplots[[a]] <- plot_lm_mean_sd (msd_merge,
dat_pred = pred,
text_size = ts,
points_size = ps,
nudge_x = 0.1 ,
col = c (params$ colco, params$ colli),
lab = lab_df,
ylab = paste0 ("Niche breadth (standard deviation on axis " , a, ")" ),
xlab = paste0 ("Niche optimum (mean position on axis " , a, ")" ),
facet = "type" )
lmbirds[[a]] <- lmb
lmplants[[a]] <- lmp
# Plot sd versus number of interactions ---
gb2 <- ggplot () +
geom_point (aes (y = msd_birds$ sd, x = colSums (interactions != 0 )),
col = params$ colco, alpha = 0.5 ) +
theme_linedraw () +
ggtitle (paste ("Bird niche width versus number of interacting partners on axis" , a)) +
xlim (c (0 , max (colSums (interactions != 0 ))))
gp2 <- ggplot () +
geom_point (aes (y = msd_plants$ sd, x = rowSums (interactions != 0 )),
col = params$ colli, alpha = 0.5 ) +
theme_linedraw () +
ggtitle (paste ("Plant niche width versus number of interacting partners on axis" , a)) +
xlim (c (0 , max (rowSums (interactions != 0 ))))
gbirds2 <- c (gbirds2, list (gb2))
gplants2 <- c (gplants2, list (gp2))
}
Code
lmplots[[1 ]] +
ylim (0 , 1.8 ) +
theme (plot.margin = margin (5 , 10 , 0 , 5 ),
text = element_text (size = ts* 3.5 ))
Code
# Export plot
ggsave (file.path (figures_path, "lm_ax1.jpeg" ),
width = 18 , height = 8 ,
dpi = 600 , units = "cm" , bg = "white" )
lmplots[[2 ]] +
theme (plot.margin = margin (5 , 10 , 0 , 5 ),
text = element_text (size = ts* 3.5 ))
Code
# Export plot
ggsave (file.path (figures_path, "lm_ax2.jpeg" ),
width = 18 , height = 8 ,
dpi = 600 , units = "cm" , bg = "white" )
lmplots[[3 ]] +
theme (plot.margin = margin (5 , 10 , 0 , 5 ),
text = element_text (size = ts* 3.5 ))
Relationship with traits
In order to interpret the axes, we can compute the correlations between the traits tables and the coordinates of birds/plants on each axis.
Code
corli <- cor (ca$ li, plant_traits)
corli <- t (corli)
Code
corco <- cor (ca$ co, animal_traits)
corco <- t (corco)
Below is the a posteriori correlation circle between the axes and the traits:
Code
rownames (corli) <- c ("fruit diameter" , "plant height" , "crop mass" )
rownames (corco) <- c ("Kipp's index" , "bill width" , "body mass" )
plot_corcircle (cor = corli,
col = params$ colli,
cor2 = corco,
lty = "longdash" ,
lty2 = "solid" ,
col2 = params$ colco,
eig = ca$ eig)
Code
ggsave (file.path (figures_path, "corcircle.svg" ),
width = 15 , height = 13 , units = "cm" )
plot_corcircle (cor = corli,
col = params$ colli,
xax = 2 , yax = 3 ,
cor2 = corco,
lty = "solid" ,
lty2 = "longdash" ,
col2 = params$ colco,
eig = ca$ eig)
Code
corli |>
DT:: datatable () |>
DT:: formatRound (1 : ncol (corli), digits = 2 )
Code
corco |>
DT:: datatable () |>
DT:: formatRound (1 : ncol (corco), digits = 2 )
Linear model with residuals
In this section, we construct linear models to predict the residuals of the regression of multivariate coordinates on known species traits. The aim is to look for a missing trait by examining the relationship between residuals and multivariate coordinates.
Plants (rows)
We first construct the linear model predicting plants CA coordinates from their traits.
Code
lm_li <- apply (ca$ li, MARGIN = 2 ,
function (li) lm (li ~ plant_traits$ meanHeight + plant_traits$ CropMass + plant_traits$ meanD1))
names (lm_li) <- paste0 ("Ax" , 1 : length (lm_li))
lapply (lm_li, summary)
$Ax1
Call:
lm(formula = li ~ plant_traits$meanHeight + plant_traits$CropMass +
plant_traits$meanD1)
Residuals:
Min 1Q Median 3Q Max
-4.2745 -1.2198 -0.3298 0.3438 5.3451
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.6364528 1.1618793 -1.408 0.1676
plant_traits$meanHeight 0.1267155 0.1265614 1.001 0.3234
plant_traits$CropMass -0.0002272 0.0003218 -0.706 0.4847
plant_traits$meanD1 0.2932961 0.1257258 2.333 0.0254 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.093 on 36 degrees of freedom
Multiple R-squared: 0.2388, Adjusted R-squared: 0.1754
F-statistic: 3.765 on 3 and 36 DF, p-value: 0.01895
$Ax2
Call:
lm(formula = li ~ plant_traits$meanHeight + plant_traits$CropMass +
plant_traits$meanD1)
Residuals:
Min 1Q Median 3Q Max
-1.6708 -0.7660 -0.2094 0.3716 4.1495
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.3079644 0.6397991 2.044 0.0483 *
plant_traits$meanHeight -0.1506873 0.0696922 -2.162 0.0373 *
plant_traits$CropMass -0.0001924 0.0001772 -1.086 0.2848
plant_traits$meanD1 0.0598534 0.0692320 0.865 0.3930
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.152 on 36 degrees of freedom
Multiple R-squared: 0.2684, Adjusted R-squared: 0.2075
F-statistic: 4.403 on 3 and 36 DF, p-value: 0.00974
$Ax3
Call:
lm(formula = li ~ plant_traits$meanHeight + plant_traits$CropMass +
plant_traits$meanD1)
Residuals:
Min 1Q Median 3Q Max
-2.3713 -0.8794 -0.2794 0.6769 4.8751
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.8489504 0.7363341 2.511 0.0167 *
plant_traits$meanHeight -0.1424334 0.0802076 -1.776 0.0842 .
plant_traits$CropMass 0.0003261 0.0002039 1.599 0.1186
plant_traits$meanD1 -0.1084142 0.0796780 -1.361 0.1821
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.326 on 36 degrees of freedom
Multiple R-squared: 0.1145, Adjusted R-squared: 0.04072
F-statistic: 1.552 on 3 and 36 DF, p-value: 0.2179
Then, we pot the relationship between CA coordinates and the residuals of these models.
Code
res_list <- lapply (seq_along (lm_li),
function (i) data.frame (residuals = residuals (lm_li[[i]]), coord = ca$ li[, i]))
for (i in 1 : length (res_list)) {
dfi <- res_list[[i]]
gp <- ggplot (dfi, aes (x = coord, y = residuals)) +
geom_point (col = params$ colli) +
geom_smooth (method = "lm" , col = params$ colli) +
theme_linedraw () +
ggtitle (paste ("Axis" , i))
print (gp)
}
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Birds (columns)
We do the same for birds.
Code
lm_co <- apply (ca$ co, MARGIN = 2 ,
function (co) lm (co ~ animal_traits$ Bodymass + animal_traits$ BillWidth + animal_traits$ KippsIndex))
names (lm_co) <- paste0 ("Ax" , 1 : length (lm_co))
lapply (lm_co, summary)
$Ax1
Call:
lm(formula = co ~ animal_traits$Bodymass + animal_traits$BillWidth +
animal_traits$KippsIndex)
Residuals:
Min 1Q Median 3Q Max
-2.8305 -0.4059 -0.0376 0.3589 4.0612
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.187964 0.558502 -7.499 1.12e-09 ***
animal_traits$Bodymass -0.002276 0.001405 -1.620 0.11169
animal_traits$BillWidth 0.323581 0.039214 8.252 7.90e-11 ***
animal_traits$KippsIndex 7.605599 2.272965 3.346 0.00158 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.036 on 49 degrees of freedom
Multiple R-squared: 0.7033, Adjusted R-squared: 0.6851
F-statistic: 38.71 on 3 and 49 DF, p-value: 5.669e-13
$Ax2
Call:
lm(formula = co ~ animal_traits$Bodymass + animal_traits$BillWidth +
animal_traits$KippsIndex)
Residuals:
Min 1Q Median 3Q Max
-2.0904 -0.4652 -0.0623 0.2344 3.5515
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.6291476 0.5139784 3.170 0.002630 **
animal_traits$Bodymass 0.0001647 0.0012931 0.127 0.899161
animal_traits$BillWidth 0.0372378 0.0360882 1.032 0.307206
animal_traits$KippsIndex -8.5095565 2.0917654 -4.068 0.000172 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9534 on 49 degrees of freedom
Multiple R-squared: 0.2691, Adjusted R-squared: 0.2244
F-statistic: 6.014 on 3 and 49 DF, p-value: 0.001427
$Ax3
Call:
lm(formula = co ~ animal_traits$Bodymass + animal_traits$BillWidth +
animal_traits$KippsIndex)
Residuals:
Min 1Q Median 3Q Max
-1.7002 -0.4934 -0.3312 0.2064 3.9726
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.476814 0.554260 2.664 0.0104 *
animal_traits$Bodymass 0.002303 0.001394 1.652 0.1050
animal_traits$BillWidth -0.056295 0.038916 -1.447 0.1544
animal_traits$KippsIndex -3.628446 2.255700 -1.609 0.1141
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.028 on 49 degrees of freedom
Multiple R-squared: 0.1273, Adjusted R-squared: 0.07385
F-statistic: 2.382 on 3 and 49 DF, p-value: 0.08075
Code
res_list <- lapply (seq_along (lm_co),
function (i) data.frame (residuals = residuals (lm_co[[i]]), coord = ca$ co[, i]))
for (i in 1 : length (res_list)) {
dfi <- res_list[[i]]
gp <- ggplot (dfi, aes (x = coord, y = residuals)) +
geom_point (col = params$ colco) +
geom_smooth (method = "lm" , col = params$ colco) +
theme_linedraw () +
ggtitle (paste ("Axis" , i))
print (gp)
}
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
References
Dam, Alje van, Mark Dekker, Ignacio Morales-Castilla, Miguel á. Rodríguez, David Wichmann, and Mara Baudena. 2021.
“Correspondence Analysis, Spectral Clustering and Graph Embedding: Applications to Ecology and Economic Complexity.” Scientific Reports 11 (1): 8926.
https://doi.org/10.1038/s41598-021-87971-9 .
Dehling, D. Matthias, Irene M. A. Bender, Pedro G. Blendinger, Marcia C. Muñoz, Marta Quitián, Francisco Saavedra, Vinicio Santillán, Katrin Böhning-Gaese, Eike-Lena Neuschulz, and Matthias Schleuning. 2021.
“ANDEAN Frugivory: Data on Plant bird Interactions and Functional Traits of Plant and Bird Species from Montane Forests Along the Andes.” https://doi.org/10.5061/DRYAD.WM37PVMN5 .
Thioulouse, Jean, and Daniel Chessel. 1992.
“A Method for Reciprocal Scaling of Species Tolerance and Sample Diversity.” Ecology 73 (2): 670–80.
https://doi.org/10.2307/1940773 .